import requests as req;
import os;
from lxml import etree;

def get_list(url,params):

    count = 0;

    # UA伪装
    header = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36",
        "Mobile-User-Agent": "Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36"
    }


    file_path = "./bug_file/PPT/";
    if not os.path.exists(file_path):
        os.makedirs(file_path);


    result = req.get(url=url,headers=header);
    result.encoding = "utf-8";
    html_text = result.text;
    html_obj = etree.HTML(html_text);

    lis = html_obj.xpath("//div[@class='wrapper']//ul[@class='posts clear']/li");
    for li in lis:
        try:
            href = li.xpath("./a[2]/@href")[0];
            href = "https://www.ypppt.com/" + href;
            name = li.xpath("./a[2]/text()")[0];

            html_text2 = req.get(url=href, headers=header).text;
            html_obj2 = etree.HTML(html_text2);
            href2 = html_obj2.xpath("//div[@class='infoss']/div[@class='button']/a[@class='down-button']/@href")[0];
            href2 = "https://www.ypppt.com/" + href2;

            html_text3 = req.get(url=href2, headers=header).text;
            html_obj3 = etree.HTML(html_text3);
            href3 = html_obj3.xpath("//div[@class='wrapper']/div[@class='box']/ul//a/@href")[0];

            # name.encode("iso-8859-1").decode("gbk");

            io = req.get(url=href3, headers=header).content;

            if io:
                with open(file_path + name + ".rar", "wb") as wf:
                    wf.write(io);
                print(name + ".rar\t下载完成!!!");
                count +=1;
            else:
                print(f"\033[0;31;40m\t{name}.rar\t下载失败!!!\033[0m");

        except Exception as e:
            print(f"\033[0;31;40m\t发生异常\t{name}下载失败!!!\033[0m", e);
            continue;
        else:
            # 没有发现异常
            pass;
        finally:
            # 不管是否出现异常都会执行
            pass;

    return count;


if __name__ == '__main__':


    "https://www.ypppt.com/moban/jianli/list-2.html"

    params = {

    };

    count = 0;

    for index in range(10):
        index+=1;
        # print("index ===>>",index);

        if index > 1:
            url = "https://www.ypppt.com/moban/jianli/list-%d"%index+".html";
        else:
            url = "https://www.ypppt.com/moban/jianli/";

        # print("url ===>>",url)

        num = get_list(url,params);
        count += num;

        # print("num ===>>", num)
        if num > 0:
            continue;
        else:
            break;


    print("爬取结束，本次成功爬取数据%d"%count+"条!!!");